import concurrent.futures as cf
from dataclasses import dataclass
from pathlib import Path

import pandas as pd
import requests


EXCLUDE_PREFIXES = {"IL", "RU", "UA"}  # not covered by NUTS-style Eurostat regional tables in practice
DROP_PREFIXES = {"RS"}  # Serbia has NUTS-like codes but is not covered by demo_r_* tables we use here


@dataclass(frozen=True)
class EurostatQuery:
    dataset: str
    params: dict[str, str]


def fetch_value(query: EurostatQuery, geo: str) -> float | None:
    base = "https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/"
    url = base + query.dataset
    params = dict(query.params)
    params["geo"] = geo

    backoff = 0.8
    for attempt in range(6):
        try:
            r = requests.get(url, params=params, timeout=60, headers={"User-Agent": "ess-research/1.0"})
            r.raise_for_status()
            js = r.json()
            values = js.get("value", {}) or {}
            if not values:
                return None
            # Single geo/time query => take first value.
            v = next(iter(values.values()))
            try:
                return float(v)
            except Exception:
                return None
        except (requests.ConnectionError, requests.Timeout, requests.HTTPError, ValueError):
            if attempt == 5:
                return None
            import time

            time.sleep(backoff)
            backoff = min(backoff * 2, 10)
    return None


def fetch_series(query: EurostatQuery, geos: list[str], max_workers: int = 4) -> pd.DataFrame:
    rows: list[dict] = []
    with cf.ThreadPoolExecutor(max_workers=max_workers) as ex:
        futs = {ex.submit(fetch_value, query, g): g for g in geos}
        for fut in cf.as_completed(futs):
            g = futs[fut]
            v = fut.result()
            if v is not None:
                rows.append({"region": g, "value": v})
    return pd.DataFrame(rows)


def main() -> None:
    root = Path(__file__).resolve().parents[1]
    out_path = root / "data_external" / "cost_shifters_region_2015_eurostat.csv"
    out_path.parent.mkdir(parents=True, exist_ok=True)

    if out_path.exists():
        print(f"Exists, skipping: {out_path}")
        return

    ess = pd.read_parquet(root / "outputs" / "ess_r8_r11_min.parquet")
    ess = ess[ess["regunit"].isin([1, 2])].copy()
    ess["region"] = ess["region"].astype(str).str.upper().str.strip()
    regions = sorted({r for r in ess["region"].dropna().unique().tolist() if r and r != "99999"})

    # Prefer already-used Eurostat regional broadband set (smaller + more stable codes).
    reg_path = root / "data_external" / "broadband_region_year_eurostat_isoc_r_broad_h.csv"
    if reg_path.exists():
        reg = pd.read_csv(reg_path)
        reg["region"] = reg["region"].astype(str).str.upper().str.strip()
        regions = sorted({r for r in reg["region"].dropna().unique().tolist() if r and r != "99999"})

    regions = [r for r in regions if r[:2] not in EXCLUDE_PREFIXES and r[:2] not in DROP_PREFIXES]

    dens_q = EurostatQuery("demo_r_d3dens", {"time": "2015", "unit": "PER_KM2"})
    area_q = EurostatQuery("demo_r_d3area", {"time": "2015", "unit": "KM2", "landuse": "TOTAL"})
    pop_q = EurostatQuery("demo_r_pjanaggr3", {"time": "2015", "unit": "NR", "sex": "T", "age": "TOTAL"})

    dens = fetch_series(dens_q, regions).rename(columns={"value": "pop_density_2015_per_km2"})
    area = fetch_series(area_q, regions).rename(columns={"value": "area_2015_km2"})
    pop = fetch_series(pop_q, regions).rename(columns={"value": "pop_2015"})

    merged = dens.merge(area, on="region", how="outer").merge(pop, on="region", how="outer")
    merged["source"] = "Eurostat API"
    merged["notes"] = "Baseline (2015) cost shifters for Bartik-style instruments; density/area/pop are time-invariant inputs."

    merged = merged.sort_values("region")
    merged.to_csv(out_path, index=False, encoding="utf-8")
    print(f"Wrote: {out_path} rows={len(merged):,} coverage={merged['region'].nunique():,}/{len(regions):,} regions")


if __name__ == "__main__":
    main()
